library(tidyverse)
library(magrittr)
library(cowplot)
#Data Load In:
urine_raw <- read_csv('~/Documents/Projects/Short_Term_Broccoli/Data/SFN_Targeted/Urine/Urine_R_Format.csv')
metadata <- read_csv('~/Documents/Projects/Short_Term_Broccoli/Data/Metadata/Meta_urine.csv')
#Clean the metadata
mdata_clean <- metadata %>%
#Weight (grams) used as a proxy for mL, convert to L
mutate(across(c(4:9), ~.x/1000)) %>%
#Pivot the data longer
pivot_longer(cols = starts_with('vol'), names_to = 'time', values_to = 'volume') %>%
#Rename the variables so they can treated as factors
mutate(time = gsub('vol_', '', time)) %>%
mutate(time = gsub('h','', time))
#Clean the urine data
urine_clean <- urine_raw %>%
#Rename the time points so they match the metadata
mutate(time = gsub('h','', time)) %>%
#Join the data with the metadata
left_join(., mdata_clean) %>%
group_by(subject_id, time) %>%
#Multiply all the uM by the (proxy) volumes to convert to uMol recovered
mutate(across(starts_with('SFN'), ~.x*volume)) %>%
#Remove the alfalfa groups so we are only focusing on the broccoli
filter(treatment %in% c('BL', 'BU')) %>%
#Convert to factors for analysis + graphing
modify_at(c('subject_id', 'time', 'cohort'), as.factor)
#Relevel the time variables to make it work
urine_clean$time %<>% fct_relevel(c('0', '3', '6', '24', '48', '72'))
#Convert to tidy data for ease of analysis
urine_tidy <- urine_clean %>%
pivot_longer(cols = starts_with('SFN'), names_to = 'metabolite', values_to = 'uMol')
This is a preliminary analysis of the targeted SFN-metabolite data from the short term broccoli feeding study. The study was run between March 2021 and November 2021. Sample prepartion was completed by Dr. Carmen Wong, Mass Spectrometry analysis was conducted Dr. Jaweoo Choi, and data analysis by Yanni Bouranis.
This analysis is preliminary and still needs further refinement, however, it seeks to answer the following questions:
Do the label and the unlabeled groups differ for total SFN metabolite excretion?
Does total SFN metabolite excretion differ between cohorts?
Does the metabolism of SFN-NAC and SFN-NIT look similar to what we saw in Lauren Atwell’s data?
Which participants were the highest and lowest SFN-NIT and SFN-NAC excreters?
All plots are interactive and can be zoomed in on and can be hovered over to get more data about specific data points. The legend can be used to isolate points of interest. Single click an item in the legend to hide all members of that class and double click to hide all others but keep that one.
The first question we want to answer is if the labeled and unlabeled groups differ in the total SFN metabolites (SFN_Tot) excreted at each time point. Let’s start by plotting each time point:
We will plot our data as both a box and whiskers plot and a dotplot so we can better see inter-individual variation. For the dotplot, the mean for each treatment group is represented as a black diamond. Note: the y-axis scales are not the same to allow us to see differences across metabolites more easily.
DvH_box <- ggplot(urine_clean, aes(x = treatment, y = SFN_Tot, fill = treatment)) +
geom_boxplot() +
facet_wrap(~time, scales = 'free_y') +
xlab('Treatment') +
ylab('Total SFN Metabolite Excretion (µMol)') +
ggtitle('Total SFN Metabolite Excretion') +
scale_x_discrete(labels = c('Labeled', 'Unlabeled')) +
theme(legend.position = 'none')
plotly::ggplotly(DvH_box)
DvH_ind <- ggplot(urine_clean, aes(x = treatment, y = SFN_Tot, color = subject_id)) +
geom_point(aes(text = paste0('Participant ID: ', subject_id, '\n',
'Cohort: ', cohort, '\n',
'Total Excretion: ', SFN_Tot, ' µMol \n'))) +
geom_point(aes(group = time),stat = 'summary', fun = mean, fill = 'grey', color = 'black', alpha = 0.9, size = 3.6, shape = 18) +
facet_wrap(~time, scales = 'free_y') +
xlab('Treatment') +
ylab('Total SFN Metabolite Excretion (µMol)') +
ggtitle('Total SFN Metabolite Excretion') +
scale_x_discrete(labels = c('Labeled', 'Unlabeled')) +
scale_color_discrete(name = 'Participant \n ID')
plotly::ggplotly(DvH_ind, tooltip = 'text')
From our plots, it looks like there is not much difference across our groups. Let’s verify that by running some stats.
For ease of analysis, instead of using a linear mixed model we will stratify our data by time point and use a simple Mann-Whitney U-Test (nonparametric t-test) to compare between the Label and the Unlabeled group. The interpretation of these results is that a significant p-value denotes that there is a significant difference between the labeled- and unlabeled-samples for total SFN metabolite excretion at each time point.
labelstats <- urine_tidy %>%
filter(metabolite == 'SFN_Tot') %>%
group_by(time) %>%
nest() %>%
mutate(pval = map_dbl(data, function(x) wilcox.test(uMol ~ treatment, data = x)$p.val)) %>%
dplyr::select(-data) %>%
rename('Time' = time, 'p-Value' = pval)
knitr::kable(labelstats)
| Time | p-Value |
|---|---|
| 0 | 0.6547343 |
| 3 | 0.7985884 |
| 6 | 0.0850208 |
| 24 | 0.2836463 |
| 48 | 0.1458033 |
| 72 | 0.2797874 |
Unsurprisingly, there is no significant differences between our labeled and unlabeled groups at any time points.
Our next goal is to evaluate if there is a significant cohort effect occuring when it comes to total SFN metabolite excretion. The cohorts were run during different times of years which could influence the growth of the sprouts (i.e. light, heat conditions), as well as different biological conditions occuring such as the weight activity level, stress level, and water intake of our participants.
Once again we will plot our data as both a boxplot and dotplot. Note: the y-axis scales are not the same to allow us to see differences across metabolites more easily.
CoI_box <- ggplot(urine_clean, aes(x = cohort, y = SFN_Tot, fill = cohort)) +
geom_boxplot() +
facet_wrap(~time, scales = 'free_y') +
xlab('Cohort') +
ylab('Total SFN Metabolite Excretion (µMol)') +
ggtitle('Total SFN Metabolite Excretion') +
theme(legend.position = 'none')
plotly::ggplotly(CoI_box)
CoI_ind <- ggplot(urine_clean, aes(x = cohort, y = SFN_Tot, color = subject_id, group = cohort)) +
geom_point(aes(text = paste0('Participant ID: ', subject_id, '\n',
'Cohort: ', cohort, '\n',
'Total Excretion: ', SFN_Tot, ' µMol \n'))) +
geom_point(stat = 'summary', fun = mean, fill = 'grey', color = 'black', alpha = 0.9, size = 3.6, shape = 18) +
facet_wrap(~time, scales = 'free_y') +
xlab('Cohort') +
ylab('Total SFN Metabolite Excretion (µMol)') +
ggtitle('Total SFN Metabolite Excretion') +
scale_color_discrete(name = 'Participant \n ID')
plotly::ggplotly(CoI_ind, tooltip = 'text')
Overall, it looks like most cohorts are quite similar, however, Cohorts 1 and 2 definitely have higher levels of excretion compared to the rest. For the 0h time point, it looks like participants BSS007 and BSS013 in Cohorts 1 and 2, respectively, made have made a mistake during their dietary restrictions too.
Let’s see how this plays out in our stats:
For ease of analysis, data will be stratified by time point and then analyzed across cohorts using an ANOVA. The interpretation of these results it that a significant p-value denotes a significant difference between at least 2 cohorts for that time point.
cohortstats <- urine_tidy %>%
filter(metabolite == 'SFN_Tot') %>%
group_by(time) %>%
nest() %>%
mutate(pval = map_dbl(data, function(x) anova(lm(uMol ~ cohort, data = x))$"Pr(>F)"[1])) %>%
dplyr::select(-data) %>%
rename('Time' = time, 'p-Value' = pval)
knitr::kable(cohortstats)
| Time | p-Value |
|---|---|
| 0 | 0.3288060 |
| 3 | 0.2021590 |
| 6 | 0.0007957 |
| 24 | 0.0001501 |
| 48 | 0.0000024 |
| 72 | 0.0014641 |
It looks like at our later time points we get see significant differences. From looking at our plots, I can assume that this is being driven by cohorts 1 and 2 which were fed higher amounts of sulforaphane than the other cohorts. Let’s verify this by looking at the results of the linear model we made to do our ANOVA.
To interpret this output, we look at the Coefficients section of the output. (Intercept) is Cohort 1 and Estimate for this coefficient is the estimated mean of this cohort for that time point. For the other cohorts, Estimate represents the difference between its mean and the mean of Cohort 1. Pr(>|t|) is the p-value resulting from a t-test between each cohort and Cohort 1 (the (Intercept) term) - (ignore the p-value associated with the (Intercept) term). If a coefficient (Cohort) is found to be significant, it can be interpreted that the mean in total SFN metabolites excreted between that Cohort and Cohort 1 are significantly different.
cdata_list <- urine_tidy %>%
filter(metabolite == 'SFN_Tot') %>%
group_by(time) %>%
nest()
walk2(.x = cdata_list$data,
.y = list('0h', '3h', '6h', '24h', '48h', '72h'),
function(x,y){
print(y)
print(summary(lm(uMol ~ cohort, data = x)))
})
## [1] "0h"
##
## Call:
## lm(formula = uMol ~ cohort, data = x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.37942 -0.00285 0.00000 0.00000 1.13825
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.3794 0.1225 3.098 0.0042 **
## cohort2 -0.3025 0.1732 -1.746 0.0910 .
## cohort3 -0.3710 0.1732 -2.142 0.0404 *
## cohort4 -0.3794 0.1581 -2.400 0.0228 *
## cohort5 -0.3794 0.1871 -2.028 0.0515 .
## cohort6 -0.3766 0.1581 -2.382 0.0238 *
## cohort7 -0.3794 0.1500 -2.530 0.0169 *
## cohort8 -0.3794 0.1871 -2.028 0.0515 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2449 on 30 degrees of freedom
## Multiple R-squared: 0.2199, Adjusted R-squared: 0.03786
## F-statistic: 1.208 on 7 and 30 DF, p-value: 0.3288
##
## [1] "3h"
##
## Call:
## lm(formula = uMol ~ cohort, data = x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23.901 -9.074 -2.494 6.986 29.597
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 48.139 7.200 6.686 2.47e-07 ***
## cohort2 -9.384 10.183 -0.922 0.3644
## cohort3 -13.991 10.183 -1.374 0.1800
## cohort4 -18.596 9.660 -1.925 0.0641 .
## cohort5 -26.478 10.999 -2.407 0.0227 *
## cohort6 -24.012 9.296 -2.583 0.0151 *
## cohort7 -21.234 8.819 -2.408 0.0226 *
## cohort8 -21.374 10.999 -1.943 0.0617 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.4 on 29 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.2675, Adjusted R-squared: 0.09069
## F-statistic: 1.513 on 7 and 29 DF, p-value: 0.2022
##
## [1] "6h"
##
## Call:
## lm(formula = uMol ~ cohort, data = x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -31.232 -14.307 -3.131 10.101 56.011
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 75.04 10.08 7.443 2.7e-08 ***
## cohort2 12.92 14.26 0.906 0.37215
## cohort3 -34.07 14.26 -2.390 0.02334 *
## cohort4 -40.77 13.01 -3.133 0.00385 **
## cohort5 -26.52 15.40 -1.722 0.09536 .
## cohort6 -45.71 13.01 -3.512 0.00143 **
## cohort7 -40.93 12.35 -3.315 0.00240 **
## cohort8 -30.47 15.40 -1.979 0.05710 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 20.16 on 30 degrees of freedom
## Multiple R-squared: 0.5374, Adjusted R-squared: 0.4294
## F-statistic: 4.978 on 7 and 30 DF, p-value: 0.0007957
##
## [1] "24h"
##
## Call:
## lm(formula = uMol ~ cohort, data = x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -34.500 -7.647 -0.751 9.526 36.503
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 83.641 8.695 9.619 1.12e-10 ***
## cohort2 -6.444 12.297 -0.524 0.60410
## cohort3 -41.513 12.297 -3.376 0.00205 **
## cohort4 -47.142 11.225 -4.200 0.00022 ***
## cohort5 -47.374 13.282 -3.567 0.00124 **
## cohort6 -52.518 11.225 -4.679 5.77e-05 ***
## cohort7 -50.858 10.649 -4.776 4.39e-05 ***
## cohort8 -37.519 13.282 -2.825 0.00833 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17.39 on 30 degrees of freedom
## Multiple R-squared: 0.5919, Adjusted R-squared: 0.4967
## F-statistic: 6.217 on 7 and 30 DF, p-value: 0.0001501
##
## [1] "48h"
##
## Call:
## lm(formula = uMol ~ cohort, data = x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.1168 -1.8467 0.1342 1.7926 4.9284
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.894 1.393 10.689 9.48e-12 ***
## cohort2 -2.503 1.971 -1.270 0.213690
## cohort3 -8.655 1.971 -4.392 0.000129 ***
## cohort4 -8.848 1.799 -4.919 2.93e-05 ***
## cohort5 -7.369 2.128 -3.462 0.001632 **
## cohort6 -11.503 1.799 -6.395 4.65e-07 ***
## cohort7 -10.978 1.707 -6.433 4.18e-07 ***
## cohort8 -9.771 2.128 -4.591 7.38e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.787 on 30 degrees of freedom
## Multiple R-squared: 0.6978, Adjusted R-squared: 0.6273
## F-statistic: 9.896 on 7 and 30 DF, p-value: 2.403e-06
##
## [1] "72h"
##
## Call:
## lm(formula = uMol ~ cohort, data = x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.08313 -0.43230 -0.04454 0.36891 2.85011
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.1035 0.4780 6.493 3.55e-07 ***
## cohort2 -0.8936 0.6760 -1.322 0.196200
## cohort3 -2.2756 0.6760 -3.366 0.002101 **
## cohort4 -2.3524 0.6171 -3.812 0.000638 ***
## cohort5 -1.9318 0.7302 -2.646 0.012851 *
## cohort6 -2.7888 0.6171 -4.519 9.02e-05 ***
## cohort7 -2.6567 0.5854 -4.538 8.56e-05 ***
## cohort8 -1.9976 0.7302 -2.736 0.010348 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.956 on 30 degrees of freedom
## Multiple R-squared: 0.5152, Adjusted R-squared: 0.4021
## F-statistic: 4.554 on 7 and 30 DF, p-value: 0.001464
As suspected, Cohorts 1 and 2 are higher than all other cohorts at all time points. Let’s remove Cohorts 1 and 2 from our data and then try again to see if it still comes out as significantly different.
cohortstats_2 <- urine_tidy %>%
#Filter down to only total sulforaphane content
filter(metabolite == 'SFN_Tot') %>%
#Remove cohorts 1 and 2
filter(!cohort %in% c(1,2)) %>%
group_by(time) %>%
nest() %>%
mutate(pval = map_dbl(data, function(x) anova(lm(uMol ~ cohort, data = x))$"Pr(>F)"[1])) %>%
dplyr::select(-data) %>%
rename('Time' = time, 'p-Value' = pval)
knitr::kable(cohortstats_2)
| Time | p-Value |
|---|---|
| 0 | 0.3210257 |
| 3 | 0.8174697 |
| 6 | 0.6086423 |
| 24 | 0.7824433 |
| 48 | 0.2127658 |
| 72 | 0.2650680 |
As expected, Cohorts 1 and 2 were driving the significance that we observed earlier.
How do we want to handle these cohorts for future analyses?
Next we want to see the metabolism of SFN-NAC and SFN-NIT in our participants. To control for variable urine production between individuals, µM (as reported by Carmen) has been converted to µMol recovered in urine using urine weight as a proxy for volume. We will start by looking at the metabolism of SFN-NAC by plotting it by both subject, to see inter-individual variation, and by Cohort to see means across cohorts.
Hovering over any data point will give you more information about that point and the plot can be zoomed in on. The grey bars represent the mean at each timepoint.
NACall_sub <- ggplot(urine_clean, aes(x = time, y = SFN_NAC, group = subject_id, color = subject_id)) +
geom_bar(aes(group = time),stat = 'summary', fun = mean, fill = 'grey', color = 'black', alpha = 0.5) +
geom_point(size = 2, aes(text = paste0('Participant ID: ', subject_id, '\n',
'Cohort: ', cohort, '\n',
'Time Point: ', time, ' hours \n',
'Total Excretion: ', SFN_NAC, ' µMol \n'))) +
geom_path(size = 0.5) +
theme_minimal_hgrid() +
ylab('SFN-NAC (µmol)') +
xlab('Time (Hours)') +
ggtitle('SFN-NAC Excretion (µMol) by Participant') +
scale_color_discrete(name = 'Participant \n ID')
plotly::ggplotly(NACall_sub, tooltip = 'text')
ser <- function(x){
sd(x)/sqrt(length(x))
}
summary_NAC <- urine_tidy %>%
filter(metabolite == 'SFN_NAC') %>%
group_by(time, cohort) %>%
drop_na() %>%
summarise(mean = round(mean(uMol, na.rm = TRUE),3),
SE = round(ser(uMol), 3),
n = n())
NACsum <- ggplot(summary_NAC, aes(x = time, y = mean, fill = cohort)) +
geom_errorbar(aes(ymin = mean - SE, ymax = mean+SE,
text = paste('Standard Error: ±', SE, ' µMol')), width = 0.2) +
geom_col(aes(text = paste0('Cohort: ', cohort, '\n',
'Time: ', time, '\n',
'Mean SFN-NAC: ', mean, ' µmol \n',
'n: ', n))) +
theme_minimal_hgrid() +
ylab('SFN-NAC (µmol)') +
xlab('Time (Hours)') +
facet_wrap(~cohort, ncol = 4) +
ggtitle('SFN-NAC Excretion by Cohort') +
theme(legend.position = 'none')
plotly::ggplotly(NACsum, tooltip = 'text')
Next we will look at SFN-NIT metabolism. To recap, in Lauren Atwell’s data we saw an increase in metabolism from 0h to 24 hours then for some participants there was a decline between 24 and 48 hours while other saw an increase.
NITall_sub <- ggplot(urine_clean, aes(x = time, y = SFN_NIT, group = subject_id, color = subject_id)) +
geom_bar(aes(group = time),stat = 'summary', fun = mean, fill = 'grey', color = 'black', alpha = 0.5) +
geom_point(size = 2, aes(text = paste0('Participant ID: ', subject_id, '\n',
'Cohort: ', cohort, '\n',
'Time Point: ', time, ' hours \n',
'SFN-NIT Excretion: ', SFN_NIT, ' µMol \n'))) +
geom_path(size = 0.5) +
theme_minimal_hgrid() +
ylab('SFN-NIT (µmol)') +
xlab('Time (Hours)') +
ggtitle('SFN-NIT Excretion (µMol) by Participant') +
scale_color_discrete(name = 'Participant \n ID')
plotly::ggplotly(NITall_sub, tooltip = 'text')
summary_NIT <- urine_tidy %>%
filter(metabolite == 'SFN_NIT') %>%
group_by(time, cohort) %>%
drop_na() %>%
summarise(mean = round(mean(uMol, na.rm = TRUE),3),
SE = round(ser(uMol), 3),
n = n())
NITsum <- ggplot(summary_NIT, aes(x = time, y = mean, fill = cohort)) +
geom_errorbar(aes(ymin = mean - SE, ymax = mean+SE,
text = paste('Standard Error: ±', SE, ' µMol')), width = 0.2) +
geom_col(aes(text = paste0('Cohort: ', cohort, '\n',
'Time: ', time, '\n',
'Mean SFN-NIT: ', mean, ' µmol \n',
'n: ', n))) +
theme_minimal_hgrid() +
ylab('SFN-NIT (µmol)') +
xlab('Time (Hours)') +
facet_wrap(~cohort, ncol = 4) +
theme(legend.position = 'none')
plotly::ggplotly(NITsum, tooltip = 'text')
Interestingly, it looks like pattern in SFN-NIT that we saw in Lauren Atwell’s data isn’t observed here. One explanation could be the use of urine weight as a proxy for volume as opposed to true volume, as we used in Lauren Atwell’s data. The pattern in SFN-NAC excretion is also different from what we observed in Lauren’ Atwell’s data. The excretion of SFN-NIT between at 24 hours is fairly similar to what we saw in Lauren Atwell’s study, however. In her study, participants consumed 200 µmol of SFN-equivalents and ~23 µmol of SFN-NIT was recovered in urine at 24 hours, while in our study our particpants consumed 100 µmol of SFN-equivalents at ~12 µmol of SFN-NIT was recovered at 24 hours.
Lastly, let’s see at what time points SFN-NIT and SFN-NAC excretion was significantly different compared to 0hr. To evaluate this, we will use a nested mixed effects model. We will be using the model formula: uMol ~ time + (1|cohort/subject_id)
Interpretation of the results: Compared to 0h, does this time point significantly differ compare in µmol recovered?
library(lme4)
test <- urine_tidy %>%
filter(metabolite == 'SFN_NIT')
#Fit our model:
lm_time <- urine_tidy %>%
filter(metabolite != 'SFN_Tot') %>%
group_by(metabolite) %>%
nest() %>%
#Run the model:
mutate(model = map(data, function(x) lmer(uMol ~ time + (1|cohort/subject_id), data = x))) %>%
#Run the contrasts across each time point
mutate(pval_3h = map_dbl(model, function(x) lmerTest::contest(x, c(0,1,0,0,0,0))$'Pr(>F)')) %>%
mutate(pval_6h = map_dbl(model, function(x) lmerTest::contest(x, c(0,0,1,0,0,0))$'Pr(>F)')) %>%
mutate(pval_24h = map_dbl(model, function(x) lmerTest::contest(x, c(0,0,0,1,0,0))$'Pr(>F)')) %>%
mutate(pval_48h = map_dbl(model, function(x) lmerTest::contest(x, c(0,0,0,0,1,0))$'Pr(>F)')) %>%
mutate(pval_72h = map_dbl(model, function(x) lmerTest::contest(x, c(0,0,0,0,0,1))$'Pr(>F)')) %>%
dplyr::select(-c(data, model)) %>%
rename('Metabolite' = metabolite, '3h p-Value' = pval_3h, '6h p-Value' = pval_6h, '24h p-Value' = pval_24h,
'48h p-Value' = pval_48h, '72h p-Value' = pval_72h)
#Round the data to make it more readable
lmt_rounded <- lm_time %>%
modify_if(is.numeric, round, 4)
knitr::kable(lmt_rounded)
| Metabolite | 3h p-Value | 6h p-Value | 24h p-Value | 48h p-Value | 72h p-Value |
|---|---|---|---|---|---|
| SFN | 0.0000 | 0.0000 | 0.0000 | 0.9943 | 0.9943 |
| SFN_CYS | 0.0000 | 0.0000 | 0.0000 | 0.8433 | 0.9819 |
| SFN_NAC | 0.0000 | 0.0000 | 0.0000 | 0.2817 | 0.8287 |
| SFN_CG | 0.6383 | 0.2571 | 0.0476 | 0.9956 | 0.9956 |
| SFN_GSH | 0.2243 | 0.0015 | 1.0000 | 1.0000 | 1.0000 |
| SFN_NIT | 0.0010 | 0.0000 | 0.0000 | 0.0000 | 0.4688 |
To address this problem we can take two approaches: 1) Find participants who excreted the highest cumulative amount SFN-NIT and SFN-NAC across all times points
We will use our first strategy to look at cumulative metabolite production of NAC and NIT across all time points.
cumsum_NAC <- urine_tidy %>%
filter(metabolite == 'SFN_NAC') %>%
drop_na() %>%
group_by(subject_id, cohort) %>%
nest() %>%
mutate(data = map_dbl(data, function(x) sum(x$uMol))) %>%
ungroup()
fillvec <- rep('', length(cumsum_NAC$data))
fillvec[which(cumsum_NAC$data %in% head(cumsum_NAC$data[order(cumsum_NAC$data, decreasing = TRUE)], 5))] <- 'Highest'
fillvec[which(cumsum_NAC$data %in% tail(cumsum_NAC$data[order(cumsum_NAC$data, decreasing = TRUE)], 5))] <- 'Lowest'
cumsum_NAC$maxmin <- fillvec
csoutNAC <- cumsum_NAC %>%
dplyr::select(subject_id, cohort, data, maxmin) %>%
filter(maxmin != '') %>%
arrange(subject_id) %>%
rename('Participant' = subject_id, 'Cohort' = cohort, 'Cumulative µMol Excreted' = data, 'Highest or Lowest Producer' = maxmin)
knitr::kable(csoutNAC, caption = 'Highest and Lowest Cumulative SFN-NAC Excreters')
| Participant | Cohort | Cumulative µMol Excreted | Highest or Lowest Producer |
|---|---|---|---|
| BSS003 | 1 | 150.42256 | Highest |
| BSS007 | 1 | 137.54865 | Highest |
| BSS009 | 2 | 156.76076 | Highest |
| BSS017 | 2 | 158.85693 | Highest |
| BSS035 | 4 | 138.58845 | Highest |
| BSS039 | 4 | 18.99498 | Lowest |
| BSS040 | 4 | 30.46430 | Lowest |
| BSS056 | 6 | 27.05526 | Lowest |
| BSS068 | 7 | 28.00599 | Lowest |
| BSS072 | 7 | 37.51804 | Lowest |
cumsum_NIT <- urine_tidy %>%
filter(metabolite == 'SFN_NIT') %>%
drop_na() %>%
group_by(subject_id, cohort) %>%
nest() %>%
mutate(data = map_dbl(data, function(x) sum(x$uMol))) %>%
ungroup()
fillvec <- rep('', length(cumsum_NIT$data))
fillvec[which(cumsum_NIT$data %in% head(cumsum_NIT$data[order(cumsum_NIT$data, decreasing = TRUE)], 5))] <- 'Highest'
fillvec[which(cumsum_NIT$data %in% tail(cumsum_NIT$data[order(cumsum_NIT$data, decreasing = TRUE)], 5))] <- 'Lowest'
cumsum_NIT$maxmin <- fillvec
csoutNIT <- cumsum_NIT %>%
dplyr::select(subject_id, cohort, data, maxmin) %>%
filter(maxmin != '') %>%
arrange(subject_id) %>%
rename('Participant' = subject_id, 'Cohort' = cohort, 'Cumulative µMol Excreted' = data, 'Highest or Lowest Producer' = maxmin)
knitr::kable(csoutNIT, caption = 'Highest and Lowest Cumulative SFN-NIT Excreters')
| Participant | Cohort | Cumulative µMol Excreted | Highest or Lowest Producer |
|---|---|---|---|
| BSS005 | 1 | 75.526580 | Highest |
| BSS006 | 1 | 56.647162 | Highest |
| BSS007 | 1 | 44.036687 | Highest |
| BSS009 | 2 | 68.164386 | Highest |
| BSS015 | 2 | 48.246656 | Highest |
| BSS039 | 4 | 5.412319 | Lowest |
| BSS040 | 4 | 7.045884 | Lowest |
| BSS052 | 6 | 5.923686 | Lowest |
| BSS054 | 6 | 7.821824 | Lowest |
| BSS058 | 6 | 9.887403 | Lowest |
For SFN-NAC, our top excreters are BSS017, BSS009, BSS003, BSS035, and BSS007.
For SFN-NIT, our top excreters are BSS005, BSS009, BSS006, BSS015, and BSS007.
There is quite a bit of overlap between our top NAC and NIT excreters and most of top excreters unsurprisingly come from Cohorts 1 and 2. BSS035, a top NAC excreter, is an interesting exception to this pattern.
Let’s plot our highest and lowest excreter from the cumulative method:
cs_final_NAC <- cumsum_NAC %>%
filter(maxmin != '') %>%
select(subject_id, maxmin) %>%
left_join(urine_tidy) %>%
filter(metabolite %in% c('SFN_NAC', 'SFN_NIT'))
cs_plot_r <- ggplot(cs_final_NAC, aes(x = time, y = uMol, color = subject_id, group = subject_id)) +
geom_point(size = 2, aes(text = paste0('Participant ID: ', subject_id, '\n',
'Cohort: ', cohort, '\n',
'Group: ', maxmin, '\n',
'Time Point: ', time, ' hours \n',
'Recovered: ', uMol, ' µMol \n'))) +
geom_path(size = 0.5) +
facet_wrap(~metabolite, ncol = 4) +
theme_minimal_hgrid() +
ylab('Metabolites Recovered (µmol)') +
xlab('Time (Hours)') +
ggtitle('High/Low SFN-NAC Excreters - Recovery') +
scale_color_discrete(name = 'Participant \n ID')
plotly::ggplotly(cs_plot_r, tooltip = 'text')
cssum <- cs_final_NAC %>%
group_by(subject_id, metabolite, maxmin) %>%
mutate(cumsum = cumsum(uMol))
csplot_t <- ggplot(cssum, aes(x = time, y = cumsum, color = subject_id, group = subject_id)) +
geom_point(size = 2, aes(text = paste0('Participant ID: ', subject_id, '\n',
'Cohort: ', cohort, '\n',
'Group: ', maxmin, '\n',
'Time Point: ', time, ' hours \n',
'Cumulative Excretion: ', cumsum, ' µMol \n'))) +
geom_path(aes(group = subject_id), size =0.5) +
facet_wrap(~metabolite, ncol = 4) +
theme_minimal_hgrid() +
ylab('Cumulative Recovery (µmol)') +
xlab('Time (Hours)') +
ggtitle('High/Low SFN-NAC Excreters - Cumulative') +
scale_color_discrete(name = 'Participant \n ID')
plotly::ggplotly(csplot_t, tooltip = 'text')
cs_final_NIT <- cumsum_NIT %>%
filter(maxmin != '') %>%
select(subject_id, maxmin) %>%
left_join(urine_tidy) %>%
filter(metabolite %in% c('SFN_NAC', 'SFN_NIT'))
cs_plot_r_nit <- ggplot(cs_final_NIT, aes(x = time, y = uMol, color = subject_id, group = subject_id)) +
geom_point(size = 2, aes(text = paste0('Participant ID: ', subject_id, '\n',
'Cohort: ', cohort, '\n',
'Group: ', maxmin, '\n',
'Time Point: ', time, ' hours \n',
'Recovered: ', uMol, ' µMol \n'))) +
geom_path(size = 0.5) +
facet_wrap(~metabolite, ncol = 4) +
theme_minimal_hgrid() +
ylab('Metabolites Recovered (µmol)') +
xlab('Time (Hours)') +
ggtitle('High/Low SFN-NIT Excreters - Recovery') +
scale_color_discrete(name = 'Participant \n ID')
plotly::ggplotly(cs_plot_r_nit, tooltip = 'text')
cssum_NIT <- cs_final_NIT %>%
group_by(subject_id, metabolite, maxmin) %>%
mutate(cumsum = cumsum(uMol))
csplot_t_nit <- ggplot(cssum_NIT, aes(x = time, y = cumsum, color = subject_id, group = subject_id)) +
geom_point(size = 2, aes(text = paste0('Participant ID: ', subject_id, '\n',
'Cohort: ', cohort, '\n',
'Group: ', maxmin, '\n',
'Time Point: ', time, ' hours \n',
'Cumulative Excretion: ', cumsum, ' µMol \n'))) +
geom_path(aes(group = subject_id), size = 0.5) +
facet_wrap(~metabolite, ncol = 4) +
theme_minimal_hgrid() +
ylab('Cumulative Recovery (µmol)') +
xlab('Time (Hours)') +
ggtitle('High/Low SFN-NIT Excreters - Cumulative') +
scale_color_discrete(name = 'Participant \n ID')
plotly::ggplotly(csplot_t_nit, tooltip = 'text')
Next we will search across each individual and find their maximum NAC and NIT excretion then search across all individuals to find the highest excretion.
maxminNAC <- urine_tidy %>%
filter(metabolite == 'SFN_NAC') %>%
drop_na() %>%
#Group by subject ID and break down into individual dataframes by participant
group_by(subject_id) %>%
nest() %>%
#Pull out the time point with max recovery for each metabolite and each individual
mutate(data = map(data, function(x) x[which(x$uMol == max(x$uMol)),])) %>%
#Put the dataframe back together
unnest(cols = data) %>%
arrange(uMol) %>%
ungroup()
fillvec <- rep('', length(maxminNAC$uMol))
fillvec[which(maxminNAC$uMol %in% head(maxminNAC$uMol[order(maxminNAC$uMol, decreasing = TRUE)], 5))] <- 'Highest'
fillvec[which(maxminNAC$uMol %in% tail(maxminNAC$uMol[order(maxminNAC$uMol, decreasing = TRUE)], 5))] <- 'Lowest'
maxminNAC$maxmin <- fillvec
outtableNAC <- maxminNAC %>%
dplyr::select(subject_id, cohort, time, uMol, maxmin) %>%
filter(maxmin != '') %>%
arrange(subject_id) %>%
rename('Participant' = subject_id, 'Cohort' = cohort, 'Time Recovered' = time, 'Receovered µMol' = uMol, 'Highest or Lowest Producer' = maxmin)
knitr::kable(outtableNAC, caption = 'Highest and Lowest SFN-NAC Producers')
| Participant | Cohort | Time Recovered | Receovered µMol | Highest or Lowest Producer |
|---|---|---|---|---|
| BSS003 | 1 | 3 | 55.76902 | Highest |
| BSS006 | 1 | 24 | 52.24617 | Highest |
| BSS007 | 1 | 6 | 54.78522 | Highest |
| BSS009 | 2 | 6 | 86.60635 | Highest |
| BSS017 | 2 | 24 | 57.61610 | Highest |
| BSS039 | 4 | 3 | 10.01200 | Lowest |
| BSS040 | 4 | 24 | 17.57776 | Lowest |
| BSS056 | 6 | 3 | 11.80669 | Lowest |
| BSS068 | 7 | 6 | 11.91840 | Lowest |
| BSS072 | 7 | 24 | 13.05457 | Lowest |
maxminNIT <- urine_tidy %>%
filter(metabolite == 'SFN_NIT') %>%
drop_na() %>%
#Group by subject ID and break down into individual dataframes by participant
group_by(subject_id) %>%
nest() %>%
#Pull out the time point with max recovery for each metabolite and each individual
mutate(data = map(data, function(x) x[which(x$uMol == max(x$uMol)),])) %>%
#Put the dataframe back together
unnest(cols = data) %>%
ungroup()
fillvec <- rep('', length(maxminNIT$uMol))
fillvec[which(maxminNIT$uMol %in% head(maxminNIT$uMol[order(maxminNIT$uMol, decreasing = TRUE)], 5))] <- 'Highest'
fillvec[which(maxminNIT$uMol %in% tail(maxminNIT$uMol[order(maxminNIT$uMol, decreasing = TRUE)], 5))] <- 'Lowest'
maxminNIT$maxmin <- fillvec
outtableNIT <- maxminNIT%>%
dplyr::select(subject_id, cohort, time, uMol, maxmin) %>%
filter(maxmin != '') %>%
arrange(subject_id) %>%
rename('Participant' = subject_id, 'Cohort' = cohort, 'Time Recovered' = time, 'Receovered µMol' = uMol, 'Highest or Lowest Producer' = maxmin)
knitr::kable(outtableNIT, caption = 'Highest and Lowest SFN-NIT Producers')
| Participant | Cohort | Time Recovered | Receovered µMol | Highest or Lowest Producer |
|---|---|---|---|---|
| BSS005 | 1 | 24 | 40.676706 | Highest |
| BSS006 | 1 | 24 | 31.925270 | Highest |
| BSS009 | 2 | 24 | 28.322472 | Highest |
| BSS015 | 2 | 24 | 23.726976 | Highest |
| BSS017 | 2 | 24 | 19.636344 | Highest |
| BSS039 | 4 | 3 | 1.778377 | Lowest |
| BSS040 | 4 | 24 | 3.905985 | Lowest |
| BSS052 | 6 | 24 | 2.766656 | Lowest |
| BSS054 | 6 | 24 | 3.817469 | Lowest |
| BSS064 | 7 | 24 | 4.703800 | Lowest |
Once again, we see quite a bit of overlap between our high NIT and NAC producers. The highest NAC producers are BSS009, BSS017, BSS003, BSS007, and BSS006. Similarly the highest NIT producers are BSS005, BSS006, BSS009, BSS015, and BSS017.
We can now plot our results to see how they look:
mx_final <- maxminNAC %>%
filter(maxmin != '') %>%
select(subject_id, maxmin) %>%
left_join(urine_tidy) %>%
filter(metabolite %in% c('SFN_NAC', 'SFN_NIT'))
mx_plot_r <- ggplot(mx_final, aes(x = time, y = uMol, color = subject_id, group = subject_id)) +
geom_point(size = 2, aes(text = paste0('Participant ID: ', subject_id, '\n',
'Cohort: ', cohort, '\n',
'Group: ', maxmin, '\n',
'Time Point: ', time, ' hours \n',
'Recovered: ', uMol, ' µMol \n'))) +
geom_path(size = 0.5) +
facet_wrap(~metabolite, ncol = 4) +
theme_minimal_hgrid() +
ylab('Metabolites Recovered (µmol)') +
xlab('Time (Hours)') +
ggtitle('High/Low SFN-NAC Excreters - Recovery') +
scale_color_discrete(name = 'Participant \n ID')
plotly::ggplotly(mx_plot_r, tooltip = 'text')
mxsum <- mx_final %>%
group_by(subject_id, metabolite, maxmin) %>%
mutate(cumsum = cumsum(uMol))
mxplot_t <- ggplot(mxsum, aes(x = time, y = cumsum, color = subject_id, group = subject_id)) +
geom_point(size = 2, aes(text = paste0('Participant ID: ', subject_id, '\n',
'Cohort: ', cohort, '\n',
'Group: ', maxmin, '\n',
'Time Point: ', time, ' hours \n',
'Recovered: ', uMol, ' µMol \n'))) +
geom_path(size = 0.5) +
facet_wrap(~metabolite, ncol = 4) +
theme_minimal_hgrid() +
ylab('Cumulative Recovery (µmol)') +
xlab('Time (Hours)') +
ggtitle('High/Low SFN-NIT Excreters - Cumulative') +
scale_color_discrete(name = 'Participant \n ID')
plotly::ggplotly(mxplot_t, tooltip = 'text')
mxfinal_NIT <- maxminNIT %>%
filter(maxmin != '') %>%
select(subject_id, maxmin) %>%
left_join(urine_tidy) %>%
filter(metabolite %in% c('SFN_NAC', 'SFN_NIT'))
mx_plot_r_nit <- ggplot(mxfinal_NIT, aes(x = time, y = uMol, color = subject_id, group = subject_id)) +
geom_point(size = 2, aes(text = paste0('Participant ID: ', subject_id, '\n',
'Cohort: ', cohort, '\n',
'Group: ', maxmin, '\n',
'Time Point: ', time, ' hours \n',
'Recovered: ', uMol, ' µMol \n'))) +
geom_path(size = 0.5) +
facet_wrap(~metabolite, ncol = 4) +
theme_minimal_hgrid() +
ylab('Metabolites Recovered (µmol)') +
xlab('Time (Hours)') +
ggtitle('High/Low SFN-NIT Excreters - Recovery') +
scale_color_discrete(name = 'Participant \n ID')
plotly::ggplotly(mx_plot_r_nit, tooltip = 'text')
mxsum_NIT <- mxfinal_NIT %>%
group_by(subject_id, metabolite, maxmin) %>%
mutate(cumsum = cumsum(uMol))
mxplot_t_nit <- ggplot(mxsum_NIT, aes(x = time, y = cumsum, color = subject_id, group = subject_id)) +
geom_point(size = 2, aes(text = paste0('Participant ID: ', subject_id, '\n',
'Cohort: ', cohort, '\n',
'Group: ', maxmin, '\n',
'Time Point: ', time, ' hours \n',
'Cumulative Excretion: ', cumsum, ' µMol \n'))) +
geom_path(aes(group = subject_id), size = 0.5) +
facet_wrap(~metabolite, ncol = 4) +
theme_minimal_hgrid() +
ylab('Cumulative Recovery (µmol)') +
xlab('Time (Hours)') +
ggtitle('High/Low SFN-NIT Excreters - Cumulative') +
scale_color_discrete(name = 'Participant \n ID')
plotly::ggplotly(mxplot_t_nit, tooltip = 'text')
Which individuals do want to investigate further for our metabolomics experiment? Which method do we want to use to select our participants? Cumulative excretion or maximum recovery? Do we want to focus on the NIT or NAC list?